library(tidyverse)
library(streamgraph)
library(viridis)
library(hrbrthemes)
library(plotly)
library(ggalt)
library(jpeg)
library(ggplot2)
library(patchwork)
library(dplyr)
library(ggimage)
library(ggpubr)
library(jpeg)
library(stringr)
library(prismatic)
library(Cairo)
library(scales)
library(extrafont)
library(hrbrthemes)
library(GGally)
library(viridis)Homework Assignment #3
Data Viz
Option 1
How have Salmon fisheries in Alaska changed since 1985 in terms of yield and regulations?
Variables:
Year: time range from 1985 to 2023
Area : Alaska fishing region
Species name : salmon species name
Number of fish estimated : catch numbers of salmon
Permit count : how many permits were distirbuted
Vessel count: how many vessels were used that year
Import Required Libraries
Hand - Drawn Plot
knitr::include_graphics("/Users/katebecker/Documents/Bren/Winter/240/final/Becker-eds240-HW4/drafts.jpeg")Data Read in
alaska <- read_csv("data/Alaska-Statewide.csv")
alaska <- alaska %>%
janitor::clean_names() #cleans column names for easier wranglingSalmon Image Import
# saving images downlaoded from the internet as jpegs in this repo
chinook <- "/Users/katebecker/Documents/Bren/Winter/240/final/eds240-visualizations/images/kspencer_chinook.jpg"
coho <- "/Users/katebecker/Documents/Bren/Winter/240/final/eds240-visualizations/images/kspencer_coho.jpg"
sockeye <-"/Users/katebecker/Documents/Bren/Winter/240/final/eds240-visualizations/images/sockeye-salmon.jpg"
pink <- "/Users/katebecker/Documents/Bren/Winter/240/final/eds240-visualizations/images/kspencer_pink.jpg"
chum <- "/Users/katebecker/Documents/Bren/Winter/240/final/eds240-visualizations/images/kspencer_chum.jpg"
# reading in images
chinook_img <- readJPEG(chinook, native = TRUE)
chum_img <-readJPEG(chum, native = TRUE)
sockeye_img <-readJPEG(sockeye, native = TRUE)
coho_img <-readJPEG(coho, native = TRUE)
pink_img <-readJPEG(pink, native = TRUE)Visualization #1
salmon1985 <- alaska |>
filter(year == 1985) |>
ggplot(aes(x = fct_reorder(species_name, number_of_fish_estimated), y = number_of_fish_estimated, fill = species_name)) +
geom_col() +
scale_fill_manual(values = c("#C66264", "#FD7C6E", "#FF7799", "#EC8E5E", "#F67280")) +
scale_y_continuous(labels = scales::comma, limits = c(0,60000000)) +
scale_x_discrete(limits = c("Sockeye Salmon", "Pink Salmon", "Chum Salmon", "Coho Salmon", "Chinook Salmon")) +
coord_flip() +
theme_bw()+
theme(legend.position = "none") +
labs(x = "",
y = "Catch",
title = "Salmon Catch in 1985") +
geom_image(y = 58000000, aes(image = chinook), size = 0.2)
salmon1985Salmon2023 <- alaska |>
filter(year == 2023) |>
ggplot(aes(x = fct_reorder(species_name, number_of_fish_estimated), y = number_of_fish_estimated, fill = species_name)) +
geom_col() +
scale_fill_manual(values = c("#C66264", "#FD7C6E", "#FF7799", "#EC8E5E", "#F67280")) +
scale_y_continuous(labels = scales::comma, limits = c(0,60000000)) +
scale_x_discrete(limits = c("Sockeye Salmon", "Pink Salmon", "Chum Salmon", "Coho Salmon", "Chinook Salmon")) +
coord_flip() +
theme_bw() +
theme(legend.position = "none") +
labs(x = "",
y = "Catch",
title = "Salmon Catch in 2023") +
geom_image(y = 58000000, aes(image = chinook), size = 0.2)
Salmon2023Final Visualization
gridExtra::grid.arrange(salmon1985, Salmon2023,ncol=1)Visualization #2
Data Wrangling
alaska$area <- as.factor(alaska$area)
summary_stats <- alaska %>%
group_by(area) %>%
na.omit() %>%
summarise(mean_fish = mean(number_of_fish_estimated),
sum_fish = sum(number_of_fish_estimated)) %>%
mutate(mean_fish= round(mean_fish, digits = 0))
Nrs <- alaska %>%
group_by(area) %>%
count()
summary_all <- left_join(summary_stats, Nrs, by = "area")Final Visualization
#color choice
prismatic::color(c( "#6C5B7B" ,"#C06C84","#B85265", "#C66264", "#FD7C6E", "#FF7F50", "#FF7799", "#EC8E5E", "#FF91A4",
"#F67280","#F8B195"))<colors>
#6C5B7BFF #C06C84FF #B85265FF #C66264FF #FD7C6EFF #FF7F50FF #FF7799FF #EC8E5EFF #FF91A4FF #F67280FF #F8B195FF
#ggplot2
ggplot(summary_all) +
#make custom panel grid
geom_hline(yintercept = 0, color = "lightgrey") +
geom_hline(yintercept = 2000000, color = "lightgrey") +
geom_hline(yintercept = 40000000, color = "lightgrey") +
geom_hline(yintercept = 60000000, color = "lightgrey") +
geom_hline(yintercept = 80000000, color = "lightgrey") +
geom_col(aes(
x = reorder(str_wrap(area,11),sum_fish), #is numeric
y = sum_fish,#is numeric
fill = sum_fish), #is a factor
position = "dodge2",
show.legend = TRUE,
alpha = .9) +
#new fill and legend title for number of tracks per region
scale_fill_gradientn("Fish Sum",
colours = c( "#6C5B7B","#C06C84", "#B85265", "#C66264", "#FD7C6E")) +
#mean gain per region
geom_point(aes(x = reorder(str_wrap(area,11),sum_fish),
y = mean_fish),
size = 3,
color = "gray12") +
#lollipop shaft for mean gain per region
geom_segment(aes(
x = reorder(str_wrap(area,11),mean_fish),
y = 0,
xend = reorder(str_wrap(area,11),mean_fish),
yend = 20000000),
linetype = "dashed",
color = "gray12") +
labs(x = "Salmon Catches by Region",
y = "Estimated Catch") +
coord_polar() +
theme_bw()Visualization #3
alaska %>%
group_by(area)# A tibble: 1,902 × 9
# Groups: area [12]
year area species_name number_of_fish_estim…¹ landed_weight_lbs
<dbl> <fct> <chr> <dbl> <dbl>
1 1985 Alaska Peninsula… Chinook Sal… 30210 588718
2 1985 Alaska Peninsula… Chum Salmon 2029532 14119298
3 1985 Alaska Peninsula… Coho Salmon 348632 2721395
4 1985 Alaska Peninsula… Pink Salmon 4434160 18028534
5 1985 Alaska Peninsula… Sockeye Sal… 4743247 25833839
6 1985 Arctic/Kotzebue Chinook Sal… 63 1106
7 1985 Arctic/Kotzebue Chum Salmon 521373 4528131
8 1985 Bristol Bay Chinook Sal… 120412 2191157
9 1985 Bristol Bay Chum Salmon 1068143 6835198
10 1985 Bristol Bay Coho Salmon 162822 1324352
# ℹ 1,892 more rows
# ℹ abbreviated name: ¹number_of_fish_estimated
# ℹ 4 more variables: whole_weight_lbs <dbl>, permit_count <dbl>,
# processor_count <dbl>, vessel_count <dbl>
ggparcoord(alaska,
columns = c(1,4,7,9),
groupColumn = 2,
alphaLines = 0.2,
) +
theme(axis.text = element_text(size = 7)) +
labs(title = "Parallel Coordinate Plot for Salmon Fisheries in Alaska") 2 potential visualziations I would like to explore:
https://www.usgs.gov/apps/ecosheds/pitdata/
https://www.flickr.com/photos/kushwahasantosh/5925487529
Written Responses:
- What challenges did you encounter or anticipate encountering as you continue to build / iterate on your visualizations in R?
One of the main challenges I observed is the problems that accompany untidy data. The cleaning and wrangling that different graphs require are unique to each one and it can be difficult to complete that prior to starting graph creation. I also found it difficult to find datasets online that can be bound with others. Data frames are all constructed differently making it difficult to aggregate.
- What ggplot extension tools / packages do you need to use to build your visualizations? Are there any that we haven't covered in class that you'll be learning how to use for your visualizations?
There were quite a few packages I needed to build my visualizations such as streamgraphs, hrbrthemes, ggalt, jpeg, ggimage, ggpubr, stringr, prismatic, cairo, scales, and ggally, just to name a few.
- What feedback do you need from the instructional team and / or your peers to ensure that your intended message is clear?
I think the most productive feedback would be whether or not my visualizations are enough to answer my research questions. I found that during this process I was more so looking for patterns and relationships rather than thinking about the communication of these visualizations to the public. I would also like aesthetic feedback!